In [1]:
import os
import pandas as pd
csv_files = [file for file in os.listdir() if file.endswith('.csv') and file.startswith('result_df.csv')]

# Loop through the CSV files
for file in csv_files:
    # Read the CSV file
    df = pd.read_csv(file)
    # Assuming df is your original DataFrame
    df = df.iloc[:, :-2].copy()
    print(df.columns)
    print(df.head)f
Index(['port', '1972', '1973', '1974', '1975', '1976', '1977', '1978', '1979',
       '1980', '1981', '1982', '1983', '1984', '1985', '1986', '1987', '1988',
       '1989', '1990', '1991', '1992', '1993', '1994', '1995', '1996', '1997',
       '1998', '1999', '2000'],
      dtype='object')
<bound method NDFrame.head of                        port     1972     1973     1974     1975     1976  \
0               Albany (NY)  10262.0  11328.0   9610.0   9260.0  10146.0   
1               Alpena (MI)   3380.0   2854.0   3082.0   2856.0   2835.0   
2            Anacortes (WA)   3316.0   2896.0   3972.0   4852.0   7492.0   
3            Anchorage (AK)   2058.0   2625.0   2340.0   2936.0   2932.0   
4            Ashtabula (OH)  14684.0  16732.0  16566.0  19192.0  16465.0   
5            Baltimore (MD)  45799.0  53787.0  59891.0  52661.0  52437.0   
6        Barbers Point (HI)   3420.0   3945.0   4360.0   5186.0   6593.0   
7          Baton Rouge (LA)  52903.0  53569.0  59126.0  60226.0  66703.0   
8             Beaumont (TX)  32391.0  34491.0  33504.0  30583.0  43939.0   
9           Bellingham (WA)   2184.0   3565.0   1946.0   1883.0   1761.0   
10               Boston(MA)  26483.0  27057.0  25729.0  24720.0  26172.0   
11          Bridgeport (CT)   3472.0   3554.0   3295.0   2860.0   3265.0   
12         Brownsville (TX)   3875.0   6011.0   2837.0   2829.0   2585.0   
13          Charleston (SC)  46838.0  47381.0  45886.0  42589.0  40575.0   
14       Christiansted (VI)  14684.0  16732.0  16566.0  19192.0  16465.0   
15             Detroit (MI)  37896.0  48158.0  40345.0  33607.0  32655.0   
16     Drummond Island (MI)   1360.0   1283.0    903.0   1218.0   1158.0   
17  Duluth/Superior (MI/WI)   5219.0   4920.0   4321.0   4367.0   4143.0   
18            Freeport (TX)   2824.0   2724.0   2531.0   2608.0   2341.0   
19           Galveston (TX)   1198.0    989.0    821.0    922.0    982.0   
20       Hampton Roads (VA)  71431.0  88518.0  89106.0  83674.0  89898.0   
21                Hilo (HI)    173.0   1109.0    984.0   1007.0   1452.0   
22             Hueneme (CA)   1298.0   1043.0    982.0   1109.0   1276.0   
23        Humboldt Bay (CA)   1468.0   1382.0   1252.0   1549.0   1285.0   
24               Huron (OH)    303.0    386.0    291.0    280.0    264.0   
25        Jacksonville (FL)  17029.0  16505.0  16565.0  17463.0  20221.0   
26              Lorain (OH)  27291.0  30518.0  33154.0  32453.0  35379.0   
27         Marine City (MI)   2836.0   2923.0   2508.0   1945.0   2271.0   
28              Monroe (MI)   2027.0    990.0   1216.0    459.0    200.0   
29            Muskegon (MI)    779.0    931.0    902.0    848.0   1014.0   
30          Nawiliwili (HI)   1115.0   1284.0   1097.0   1023.0   1026.0   
31          New London (CT)   2015.0   2279.0   2239.0   2262.0   2548.0   
32             Olympia (WA)   2849.0   2919.0   2990.0   2727.0   3241.0   
33              Orange (TX)  21708.0  24931.0  27800.0  26598.0  30687.0   
34              Oswego (NY)  11183.0  12542.0  11557.0  10522.0  12021.0   
35          Palm Beach (FL)   4856.0   4527.0   4931.0   4343.0   4256.0   
36          Pascagoula (MS)  30683.0  28844.0  27606.0  27566.0  25374.0   
37        Philadelphia (PA)   2188.0   2315.0   2364.0   2943.0   3143.0   
38         Plaquemines (LA)   3450.0   3741.0   3118.0   3847.0   6841.0   
39               Ponce (PR)   9200.0  10236.0   8856.0   8266.0   8578.0   

        1977      1978      1979      1980  ...      1991      1992      1993  \
0     9591.0    9832.0    9658.0    8712.0  ...    5967.0    5674.0    6053.0   
1     3005.0    3204.0    2942.0    2737.0  ...    2284.0    2486.0    2547.0   
2     8968.0    9965.0    9599.0    9166.0  ...   14481.0   15260.0   13124.0   
3     2267.0    2226.0    1639.0    1752.0  ...    2309.0    2540.0    2470.0   
4    15147.0   17428.0   21478.0   18655.0  ...   10638.0   10519.0    8979.0   
5    44756.0   46809.0   51445.0   50042.0  ...   37745.0   37656.0   37170.0   
6     6631.0    6307.0    6155.0    5726.0  ...    9002.0    8824.0    9357.0   
7    70008.0   74570.0   76703.0   79347.0  ...   87630.0   84699.0   85079.0   
8    48919.0   52770.0   58137.0   52261.0  ...   22383.0   22702.0   25410.0   
9     1718.0    1557.0    2132.0    2047.0  ...    1482.0    1383.0    1223.0   
10   25975.0   26074.0   26343.0   22034.0  ...   18562.0   19208.0   19448.0   
11    3495.0    3736.0    3243.0    3195.0  ...    3268.0    2948.0    2942.0   
12    2130.0    2164.0    2508.0    2570.0  ...    1610.0    1594.0    1735.0   
13   36136.0   39691.0   10056.0    2707.0  ...    9464.0    9650.0    9503.0   
14   15147.0   17428.0   19470.0   32993.0  ...     745.0    1046.0    1202.0   
15   33419.0   45841.0    2047.0   39107.0  ...   14321.0   16303.0   17422.0   
16     955.0    1130.0   47725.0   19268.0  ...     786.0    1496.0    2205.0   
17    4589.0    6064.0     834.0    1489.0  ...   37716.0   39304.0   37679.0   
18    2477.0    2458.0    1382.0     351.0  ...   15666.0   14953.0   14025.0   
19    1095.0    1145.0    5131.0   20131.0  ...   10858.0   12318.0    9755.0   
20  104291.0  111936.0    7464.0    2299.0  ...     944.0    1249.0    1420.0   
21    1876.0     959.0    1189.0    1242.0  ...   86315.0   79064.0   65711.0   
22    1301.0    1922.0   18419.0  108973.0  ...  131514.0  137664.0  141477.0   
23     859.0    1013.0   15278.0    1406.0  ...     416.0     474.0     696.0   
24     318.0     502.0    1473.0    1324.0  ...    2301.0    1311.0     994.0   
25   25401.0   26606.0     448.0   16899.0  ...   13767.0   15326.0   15546.0   
26   35944.0   36261.0    3575.0   30151.0  ...    7806.0    6309.0    5476.0   
27    1912.0    1952.0    2663.0     282.0  ...     929.0     846.0     966.0   
28     536.0     869.0  163621.0    2769.0  ...   41343.0   40482.0   43960.0   
29    1346.0    1216.0     794.0    1235.0  ...    5237.0    4440.0    3999.0   
30     682.0     734.0     632.0     785.0  ...    1358.0    1562.0    1635.0   
31    3105.0    3064.0    1432.0    2530.0  ...    8154.0    8430.0    8740.0   
32    3283.0    4868.0   35317.0     721.0  ...   10356.0   13211.0   12734.0   
33   30754.0   33475.0     480.0     567.0  ...     945.0     714.0     924.0   
34   11828.0   11964.0    5844.0     860.0  ...     849.0     553.0     579.0   
35    3895.0    4678.0   32773.0    1625.0  ...     818.0     582.0     506.0   
36   18326.0   22165.0    4563.0   25434.0  ...    2938.0    2734.0    2586.0   
37    3500.0    3305.0   13262.0   47883.0  ...    4760.0    1690.0    1650.0   
38    5215.0    7957.0   29146.0   38513.0  ...   37257.0   39667.0   42708.0   
39    8624.0    8459.0    3520.0    1145.0  ...   53782.0   58473.0   53110.0   

        1994      1995      1996      1997      1998      1999      2000  
0     6107.0    5803.0    5768.0    6819.0    6723.0    6402.0    6127.0  
1     2672.0    2767.0    2345.0    2901.0    3078.0    3947.0    3405.0  
2    12950.0   13110.0   13844.0   13904.0   11948.0   16232.0   18035.0  
3     2559.0    3222.0    3401.0    3424.0    3589.0    3701.0    3157.0  
4    10368.0   10010.0    9523.0   11929.0   15602.0   10495.0   12322.0  
5    41450.0   44696.0   43552.0   40029.0   40114.0   37287.0   40832.0  
6     9022.0    8233.0    8745.0    8162.0    6658.0    8707.0    7141.0  
7    86246.0   83613.0   81009.0   84023.0   66835.0   63729.0   65631.0  
8    21201.0   20937.0   35705.0   48665.0   60052.0   69406.0   82653.0  
9     1343.0    1291.0    1419.0    1555.0    1244.0    1526.0     541.0  
10   18870.0   16744.0   20104.0   20893.0   21222.0   22171.0   20751.0  
11    3054.0    3447.0    4862.0    5340.0    4626.0    4154.0    4255.0  
12    3396.0    2656.0    2401.0    2284.0    2799.0    2487.0    3268.0  
13   10830.0   11172.0    3567.0    3147.0    3795.0    3916.0    4247.0  
14    1543.0     287.0   27886.0   24868.0   25958.0   26602.0   23929.0  
15   18718.0   18661.0   80460.0   86844.0   86180.0   77987.0   83125.0  
16    1474.0    1606.0   18604.0   18135.0   19454.0   16948.0   17295.0  
17   41819.0   45049.0    1682.0    1560.0    1582.0    1732.0    1358.0  
18   17450.0   19662.0     648.0     533.0     572.0     644.0     630.0  
19   10257.0   10465.0   24571.0   26281.0   29014.0   28076.0   30985.0  
20    1842.0    2023.0    1990.0    1486.0    1488.0    1406.0    1251.0  
21   62746.0   72508.0    2176.0    2130.0    2353.0    2333.0    1551.0  
22  143663.0  135231.0   12010.0   12704.0   13723.0   12259.0   15797.0  
23     814.0    1076.0    1395.0    1362.0    1183.0    1330.0    1024.0  
24    1407.0    1220.0  148183.0  165456.0  169070.0  158828.0  191419.0  
25   16145.0   15700.0    1197.0    1175.0    1072.0    1302.0    1063.0  
26    6442.0    6167.0   49100.0   51279.0   53568.0   50742.0   70150.0  
27    1093.0    1150.0   15978.0   15955.0   14166.0   12968.0   48192.0  
28   44997.0   50972.0     395.0    1142.0    1282.0    1357.0     399.0  
29    4195.0    4620.0    2858.0    6612.0    7392.0    7723.0    3539.0  
30    2004.0    1825.0   50864.0    3265.0    3108.0    3531.0   54157.0  
31    9471.0    8813.0    2172.0    5201.0    5260.0    4636.0    2435.0  
32   12914.0   13224.0    2049.0    9594.0    9193.0    8687.0    1771.0  
33    1514.0    2166.0   83726.0    1915.0    2036.0    1734.0   90768.0  
34     686.0     693.0  131601.0   89442.0   88768.0   87511.0  138670.0  
35     474.0     435.0   11230.0  135266.0  137544.0  133715.0   12176.0  
36    2503.0    2891.0     616.0    1997.0    1423.0    1313.0     681.0  
37    1466.0    1623.0    2294.0     640.0     471.0     805.0    2950.0  
38   40746.0   40634.0    3124.0    2922.0    3149.0    3352.0    2573.0  
39   64759.0   72897.0   29343.0    2878.0    2683.0    2491.0   28710.0  

[40 rows x 30 columns]>
In [31]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import PolynomialFeatures
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error
import matplotlib.pyplot as plt

# List to store models and metrics
train_r2_scores = []
validation_r2_scores = []
train_mse_scores = []
validation_mse_scores = []
train_rmse_scores = []
validation_rmse_scores = []
train_mae_scores = []
validation_mae_scores = []

# List to store baseline metrics
baseline_r2_scores = []
baseline_mse_scores = []
baseline_rmse_scores = []
baseline_mae_scores = []
port_values = []

# Iterate through each row in the DataFrame
for index, row in df.iterrows():
    port_value = row['port']  # Get 'port' column value for the current row
    port_values.append(port_value) 
    # Extract features (excluding first column -- years) target -- all data points
    X = np.array(row.index[1:]).reshape(-1, 1)  # Features using row indices
    y = np.array(row.values[1:]).reshape(-1, 1)  # Target using row values

    # Identify and remove outliers using IQR method
    Q1 = np.percentile(y, 25)
    Q3 = np.percentile(y, 75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    
    filtered_indices = np.where((y >= lower_bound) & (y <= upper_bound))[0]
    X = X[filtered_indices]
    y = y[filtered_indices]

    # Create polynomial features
    degree = 3 
    poly = PolynomialFeatures(degree=degree)
    X_poly = poly.fit_transform(X)

    # Apply bootstrapping to generate more data
    num_bootstrap_samples = 5 # Number of bootstrap samples
    bootstrap_X = []
    bootstrap_y = []
    for _ in range(num_bootstrap_samples):
        # Resample with replacement
        bootstrap_indices = np.random.choice(len(X_poly), size=len(X_poly), replace=True)
        bootstrap_X.append(X_poly[bootstrap_indices])
        bootstrap_y.append(y[bootstrap_indices])

    bootstrap_X = np.vstack(bootstrap_X)
    bootstrap_y = np.vstack(bootstrap_y)

    # Split data into train and validation sets
    X_train, X_val, y_train, y_val = train_test_split(bootstrap_X, bootstrap_y, test_size=0.2, random_state=1)

    # Train the polynomial regression model
    model = LinearRegression()
    model.fit(X_train, y_train)

    # Make predictions on training and validation data
    y_train_pred = model.predict(X_train)
    y_val_pred = model.predict(X_val)

    mean_prediction = np.mean(y_train)  # Use mean of training data as baseline prediction

    # Calculate R² score for training and validation data
    train_r2 = r2_score(y_train, y_train_pred)
    validation_r2 = r2_score(y_val, y_val_pred)
    
    # Calculate MSE, RMSE, and MAE for training and validation data
    train_mse = mean_squared_error(y_train, y_train_pred)
    validation_mse = mean_squared_error(y_val, y_val_pred)
    train_rmse = np.sqrt(train_mse)
    validation_rmse = np.sqrt(validation_mse)
    train_mae = mean_absolute_error(y_train, y_train_pred)
    validation_mae = mean_absolute_error(y_val, y_val_pred)

    # Append metrics to the respective lists
    train_r2_scores.append(train_r2)
    validation_r2_scores.append(validation_r2)
    train_mse_scores.append(train_mse)
    validation_mse_scores.append(validation_mse)
    train_rmse_scores.append(train_rmse)
    validation_rmse_scores.append(validation_rmse)
    train_mae_scores.append(train_mae)
    validation_mae_scores.append(validation_mae)

    # Append baseline metrics to the respective lists
    baseline_r2_scores.append(r2_score(y_val, np.full_like(y_val, mean_prediction)))
    baseline_mse_scores.append(mean_squared_error(y_val, np.full_like(y_val, mean_prediction)))
    baseline_rmse_scores.append(np.sqrt(mean_squared_error(y_val, np.full_like(y_val, mean_prediction))))
    baseline_mae_scores.append(mean_absolute_error(y_val, np.full_like(y_val, mean_prediction)))

    # Plot the polynomial curve for the entire range (1972 to 2024)
        # Plot the training data
    plt.figure(figsize=(8, 6))
    plt.scatter(X_train[:, 1], y_train, color='blue', label='Training Data', alpha=0.7)  # Training data points
    sort_indices_train = np.argsort(X_train[:, 1])
    plt.plot(X_train[sort_indices_train, 1], y_train_pred[sort_indices_train], color='red', linewidth=2, label='Polynomial Fit (Train)')
    plt.axhline(y=mean_prediction, color='purple', linestyle='--', label='Baseline (Mean Prediction)')
    plt.xlabel('Feature (Index)')
    plt.ylabel('Target')
    plt.title(f'Row {index + 1} Training Data and Model - Train R²: {train_r2:.2f}')
    plt.legend()
    plt.show()

    # Plot the validation data
    plt.figure(figsize=(8, 6))
    plt.scatter(X_val[:, 1], y_val, color='green', label='Validation Data', alpha=0.7)  # Validation data points
    sort_indices_val = np.argsort(X_val[:, 1])
    plt.plot(X_val[sort_indices_val, 1], y_val_pred[sort_indices_val], color='orange', linewidth=2, label='Polynomial Fit (Validation)')
    plt.axhline(y=mean_prediction, color='purple', linestyle='--', label='Baseline (Mean Prediction)')
    plt.xlabel('Feature (Index)')
    plt.ylabel('Target')
    plt.title(f'Row {index + 1} Validation Data and Model - Validation R²: {validation_r2:.2f}')
    plt.legend()
    plt.show()

    # Plot the full range with baseline and validation points
    plt.figure(figsize=(8, 6))
    X_full = np.arange(1972, 2025).reshape(-1, 1)
    X_full_poly = poly.transform(X_full)  # Use transform instead of fit_transform
    y_full_pred = model.predict(X_full_poly)
    plt.scatter(X_val[:, 1], y_val, color='green', label='Validation Data', alpha=0.7)  # Validation data points
    plt.plot(X_full[:, 0], y_full_pred, color='purple', linewidth=2, label='Polynomial Fit (Full Range)')  # Accessing the first column of X_full
    plt.axhline(y=mean_prediction, color='purple', linestyle='--', label='Baseline (Mean Prediction)')
    plt.xlabel('Feature (Index)')
    plt.ylabel('Target')
    plt.title(f'Row {index + 1} Full Range and Baseline')
    plt.legend()
    plt.show()




    print(f"Row {index + 1} - Port Value: {port_value}")
    # Print the metrics for each row's model and baseline
    print(f"Row {index + 1}'s Model - Train R²: {train_r2:.2f}, Validation R²: {validation_r2:.2f}")
    print(f"Train MSE: {train_mse:.2f}, Validation MSE: {validation_mse:.2f}")
    print(f"Train RMSE: {train_rmse:.2f}, Validation RMSE: {validation_rmse:.2f}")
    print(f"Train MAE: {train_mae:.2f}, Validation MAE: {validation_mae:.2f}")
    print(f"Baseline R²: {baseline_r2_scores[-1]:.2f}, Baseline MSE: {baseline_mse_scores[-1]:.2f}, Baseline RMSE: {baseline_rmse_scores[-1]:.2f}, Baseline MAE: {baseline_mae_scores[-1]:.2f}")
    print(f"Theoretical Prediction for 2024: {y_full_pred[-1]}")  # Print the theoretical prediction for 2024
    print("\n")
Row 1 - Port Value: Albany (NY)
Row 1's Model - Train R²: 0.71, Validation R²: 0.66
Train MSE: 990781.16, Validation MSE: 901078.68
Train RMSE: 995.38, Validation RMSE: 949.25
Train MAE: 816.34, Validation MAE: 758.93
Baseline R²: -0.13, Baseline MSE: 2961999.76, Baseline RMSE: 1721.05, Baseline MAE: 1606.66
Theoretical Prediction for 2024: [190.17560482]


Row 2 - Port Value: Alpena (MI)
Row 2's Model - Train R²: 0.72, Validation R²: 0.64
Train MSE: 45361.37, Validation MSE: 50935.25
Train RMSE: 212.98, Validation RMSE: 225.69
Train MAE: 164.74, Validation MAE: 190.57
Baseline R²: -0.00, Baseline MSE: 141003.39, Baseline RMSE: 375.50, Baseline MAE: 326.19
Theoretical Prediction for 2024: [18437.83449364]


Row 3 - Port Value: Anacortes (WA)
Row 3's Model - Train R²: 0.90, Validation R²: 0.80
Train MSE: 1703605.95, Validation MSE: 2767148.70
Train RMSE: 1305.22, Validation RMSE: 1663.47
Train MAE: 1114.87, Validation MAE: 1455.68
Baseline R²: -0.01, Baseline MSE: 13799768.58, Baseline RMSE: 3714.80, Baseline MAE: 3263.89
Theoretical Prediction for 2024: [69390.93676949]


Row 4 - Port Value: Anchorage (AK)
Row 4's Model - Train R²: 0.68, Validation R²: 0.70
Train MSE: 117597.29, Validation MSE: 104211.18
Train RMSE: 342.92, Validation RMSE: 322.82
Train MAE: 262.41, Validation MAE: 245.46
Baseline R²: -0.00, Baseline MSE: 349361.53, Baseline RMSE: 591.07, Baseline MAE: 483.67
Theoretical Prediction for 2024: [22730.78290248]


Row 5 - Port Value: Ashtabula (OH)
Row 5's Model - Train R²: 0.48, Validation R²: 0.66
Train MSE: 9181267.33, Validation MSE: 5470474.22
Train RMSE: 3030.06, Validation RMSE: 2338.90
Train MAE: 2354.25, Validation MAE: 1916.85
Baseline R²: -0.00, Baseline MSE: 15988455.33, Baseline RMSE: 3998.56, Baseline MAE: 3484.76
Theoretical Prediction for 2024: [132953.11638069]


Row 6 - Port Value: Baltimore (MD)
Row 6's Model - Train R²: 0.46, Validation R²: 0.53
Train MSE: 25876140.65, Validation MSE: 20372504.46
Train RMSE: 5086.86, Validation RMSE: 4513.59
Train MAE: 4211.21, Validation MAE: 3564.66
Baseline R²: -0.00, Baseline MSE: 43200535.39, Baseline RMSE: 6572.71, Baseline MAE: 5174.94
Theoretical Prediction for 2024: [81350.95571601]


Row 7 - Port Value: Barbers Point (HI)
Row 7's Model - Train R²: 0.70, Validation R²: 0.68
Train MSE: 714840.36, Validation MSE: 576447.28
Train RMSE: 845.48, Validation RMSE: 759.24
Train MAE: 706.33, Validation MAE: 651.49
Baseline R²: -0.09, Baseline MSE: 1944110.40, Baseline RMSE: 1394.31, Baseline MAE: 1082.44
Theoretical Prediction for 2024: [-16397.88445091]


Row 8 - Port Value: Baton Rouge (LA)
Row 8's Model - Train R²: 0.66, Validation R²: 0.68
Train MSE: 32311480.34, Validation MSE: 30557984.69
Train RMSE: 5684.32, Validation RMSE: 5527.93
Train MAE: 4889.70, Validation MAE: 4926.74
Baseline R²: -0.00, Baseline MSE: 94979746.30, Baseline RMSE: 9745.76, Baseline MAE: 8244.97
Theoretical Prediction for 2024: [-140388.47880554]


Row 9 - Port Value: Beaumont (TX)
Row 9's Model - Train R²: 0.80, Validation R²: 0.80
Train MSE: 26025518.04, Validation MSE: 31587684.26
Train RMSE: 5101.52, Validation RMSE: 5620.29
Train MAE: 3785.21, Validation MAE: 3999.64
Baseline R²: -0.01, Baseline MSE: 162887181.83, Baseline RMSE: 12762.73, Baseline MAE: 9407.24
Theoretical Prediction for 2024: [1276929.58303833]


Row 10 - Port Value: Bellingham (WA)
Row 10's Model - Train R²: 0.52, Validation R²: 0.73
Train MSE: 68361.09, Validation MSE: 51655.57
Train RMSE: 261.46, Validation RMSE: 227.28
Train MAE: 222.26, Validation MAE: 184.68
Baseline R²: -0.01, Baseline MSE: 193755.39, Baseline RMSE: 440.18, Baseline MAE: 342.55
Theoretical Prediction for 2024: [-13426.74582028]


Row 11 - Port Value: Boston(MA)
Row 11's Model - Train R²: 0.65, Validation R²: 0.70
Train MSE: 3593923.43, Validation MSE: 2811203.65
Train RMSE: 1895.76, Validation RMSE: 1676.66
Train MAE: 1530.42, Validation MAE: 1281.26
Baseline R²: -0.01, Baseline MSE: 9557550.77, Baseline RMSE: 3091.53, Baseline MAE: 2610.78
Theoretical Prediction for 2024: [64950.48489761]


Row 12 - Port Value: Bridgeport (CT)
Row 12's Model - Train R²: 0.65, Validation R²: 0.64
Train MSE: 58768.62, Validation MSE: 38657.26
Train RMSE: 242.42, Validation RMSE: 196.61
Train MAE: 180.01, Validation MAE: 150.86
Baseline R²: -0.00, Baseline MSE: 108570.44, Baseline RMSE: 329.50, Baseline MAE: 258.29
Theoretical Prediction for 2024: [26177.87896395]


Row 13 - Port Value: Brownsville (TX)
Row 13's Model - Train R²: 0.67, Validation R²: 0.59
Train MSE: 157176.21, Validation MSE: 155344.76
Train RMSE: 396.45, Validation RMSE: 394.14
Train MAE: 309.56, Validation MAE: 291.25
Baseline R²: -0.01, Baseline MSE: 376900.14, Baseline RMSE: 613.92, Baseline MAE: 494.56
Theoretical Prediction for 2024: [23517.88686514]


Row 14 - Port Value: Charleston (SC)
Row 14's Model - Train R²: 0.40, Validation R²: 0.06
Train MSE: 4844237.91, Validation MSE: 8078660.03
Train RMSE: 2200.96, Validation RMSE: 2842.30
Train MAE: 1595.48, Validation MAE: 1988.25
Baseline R²: -0.27, Baseline MSE: 10956720.60, Baseline RMSE: 3310.09, Baseline MAE: 2860.14
Theoretical Prediction for 2024: [-140780.605793]


Row 15 - Port Value: Christiansted (VI)
Row 15's Model - Train R²: 0.59, Validation R²: 0.54
Train MSE: 49984336.58, Validation MSE: 43863708.64
Train RMSE: 7069.96, Validation RMSE: 6622.97
Train MAE: 4906.87, Validation MAE: 4386.43
Baseline R²: -0.08, Baseline MSE: 102763106.78, Baseline RMSE: 10137.21, Baseline MAE: 9351.62
Theoretical Prediction for 2024: [608856.2620163]


Row 16 - Port Value: Detroit (MI)
Row 16's Model - Train R²: 0.73, Validation R²: 0.34
Train MSE: 196463538.63, Validation MSE: 340419530.52
Train RMSE: 14016.55, Validation RMSE: 18450.46
Train MAE: 9733.73, Validation MAE: 11129.33
Baseline R²: -0.05, Baseline MSE: 539832409.96, Baseline RMSE: 23234.29, Baseline MAE: 21103.60
Theoretical Prediction for 2024: [722017.35684967]


Row 17 - Port Value: Drummond Island (MI)
Row 17's Model - Train R²: 0.47, Validation R²: 0.57
Train MSE: 28788754.80, Validation MSE: 25651850.42
Train RMSE: 5365.52, Validation RMSE: 5064.77
Train MAE: 3718.88, Validation MAE: 3446.03
Baseline R²: -0.01, Baseline MSE: 60163409.01, Baseline RMSE: 7756.51, Baseline MAE: 6643.76
Theoretical Prediction for 2024: [312415.85389709]


Row 18 - Port Value: Duluth/Superior (MI/WI)
Row 18's Model - Train R²: 0.54, Validation R²: 0.50
Train MSE: 132989599.91, Validation MSE: 154226434.84
Train RMSE: 11532.11, Validation RMSE: 12418.79
Train MAE: 8720.82, Validation MAE: 9258.03
Baseline R²: -0.01, Baseline MSE: 310456008.07, Baseline RMSE: 17619.76, Baseline MAE: 16898.98
Theoretical Prediction for 2024: [-845215.24963379]


Row 19 - Port Value: Freeport (TX)
Row 19's Model - Train R²: 0.49, Validation R²: 0.78
Train MSE: 28925110.03, Validation MSE: 9817129.46
Train RMSE: 5378.21, Validation RMSE: 3133.23
Train MAE: 3771.78, Validation MAE: 2445.58
Baseline R²: -0.08, Baseline MSE: 47999739.72, Baseline RMSE: 6928.18, Baseline MAE: 6650.51
Theoretical Prediction for 2024: [-271093.13784409]


Row 20 - Port Value: Galveston (TX)
Row 20's Model - Train R²: 0.59, Validation R²: 0.28
Train MSE: 11700452.72, Validation MSE: 20357089.45
Train RMSE: 3420.59, Validation RMSE: 4511.88
Train MAE: 2281.13, Validation MAE: 2733.71
Baseline R²: -0.14, Baseline MSE: 31993898.35, Baseline RMSE: 5656.31, Baseline MAE: 4387.11
Theoretical Prediction for 2024: [-20643.40958261]


Row 21 - Port Value: Hampton Roads (VA)
Row 21's Model - Train R²: 0.61, Validation R²: 0.69
Train MSE: 659413754.47, Validation MSE: 497181981.51
Train RMSE: 25679.05, Validation RMSE: 22297.58
Train MAE: 17989.06, Validation MAE: 14443.61
Baseline R²: -0.01, Baseline MSE: 1608369897.14, Baseline RMSE: 40104.49, Baseline MAE: 38598.70
Theoretical Prediction for 2024: [194977.04067993]


Row 22 - Port Value: Hilo (HI)
Row 22's Model - Train R²: 0.59, Validation R²: 0.61
Train MSE: 65595.63, Validation MSE: 81345.10
Train RMSE: 256.12, Validation RMSE: 285.21
Train MAE: 186.85, Validation MAE: 219.88
Baseline R²: -0.00, Baseline MSE: 207920.26, Baseline RMSE: 455.98, Baseline MAE: 289.60
Theoretical Prediction for 2024: [1356.7988691]


Row 23 - Port Value: Hueneme (CA)
Row 23's Model - Train R²: 0.65, Validation R²: 0.56
Train MSE: 9519850.46, Validation MSE: 16671452.11
Train RMSE: 3085.43, Validation RMSE: 4083.07
Train MAE: 1644.44, Validation MAE: 2266.73
Baseline R²: -0.04, Baseline MSE: 40024283.26, Baseline RMSE: 6326.47, Baseline MAE: 5056.08
Theoretical Prediction for 2024: [171627.0281105]


Row 24 - Port Value: Humboldt Bay (CA)
Row 24's Model - Train R²: 0.21, Validation R²: 0.14
Train MSE: 231745.75, Validation MSE: 338809.53
Train RMSE: 481.40, Validation RMSE: 582.07
Train MAE: 380.05, Validation MAE: 469.26
Baseline R²: -0.01, Baseline MSE: 397546.00, Baseline RMSE: 630.51, Baseline MAE: 481.97
Theoretical Prediction for 2024: [14389.53980541]


Row 25 - Port Value: Huron (OH)
Row 25's Model - Train R²: 0.35, Validation R²: 0.42
Train MSE: 24490270.70, Validation MSE: 24424560.39
Train RMSE: 4948.76, Validation RMSE: 4942.12
Train MAE: 4200.24, Validation MAE: 4188.24
Baseline R²: -0.03, Baseline MSE: 43732958.60, Baseline RMSE: 6613.09, Baseline MAE: 5468.72
Theoretical Prediction for 2024: [-62266.970191]


Row 26 - Port Value: Jacksonville (FL)
Row 26's Model - Train R²: 0.33, Validation R²: 0.17
Train MSE: 44954702.82, Validation MSE: 66032696.61
Train RMSE: 6704.83, Validation RMSE: 8126.05
Train MAE: 5872.56, Validation MAE: 7053.86
Baseline R²: -0.04, Baseline MSE: 82713718.09, Baseline RMSE: 9094.71, Baseline MAE: 8224.63
Theoretical Prediction for 2024: [-192678.25743103]


Row 27 - Port Value: Lorain (OH)
Row 27's Model - Train R²: 0.69, Validation R²: 0.75
Train MSE: 115701999.95, Validation MSE: 108625104.00
Train RMSE: 10756.49, Validation RMSE: 10422.34
Train MAE: 8730.33, Validation MAE: 9565.06
Baseline R²: -0.01, Baseline MSE: 438651599.33, Baseline RMSE: 20944.01, Baseline MAE: 19677.02
Theoretical Prediction for 2024: [953294.39849854]


Row 28 - Port Value: Marine City (MI)
Row 28's Model - Train R²: 0.87, Validation R²: 0.73
Train MSE: 113166.67, Validation MSE: 307668.27
Train RMSE: 336.40, Validation RMSE: 554.68
Train MAE: 229.36, Validation MAE: 378.69
Baseline R²: -0.00, Baseline MSE: 1154563.39, Baseline RMSE: 1074.51, Baseline MAE: 1006.54
Theoretical Prediction for 2024: [28098.64863634]


Row 29 - Port Value: Monroe (MI)
Row 29's Model - Train R²: 0.21, Validation R²: 0.37
Train MSE: 587232.32, Validation MSE: 756591.89
Train RMSE: 766.31, Validation RMSE: 869.82
Train MAE: 509.72, Validation MAE: 538.10
Baseline R²: -0.01, Baseline MSE: 1220534.67, Baseline RMSE: 1104.78, Baseline MAE: 739.28
Theoretical Prediction for 2024: [-49426.27476978]


Row 30 - Port Value: Muskegon (MI)
Row 30's Model - Train R²: 0.38, Validation R²: 0.22
Train MSE: 7041025.88, Validation MSE: 9181061.29
Train RMSE: 2653.49, Validation RMSE: 3030.03
Train MAE: 2245.82, Validation MAE: 2708.33
Baseline R²: -0.05, Baseline MSE: 12294049.70, Baseline RMSE: 3506.29, Baseline MAE: 3138.68
Theoretical Prediction for 2024: [64121.05968857]


Row 31 - Port Value: Nawiliwili (HI)
Row 31's Model - Train R²: 0.45, Validation R²: 0.66
Train MSE: 57187.88, Validation MSE: 47672.54
Train RMSE: 239.14, Validation RMSE: 218.34
Train MAE: 182.72, Validation MAE: 179.02
Baseline R²: -0.00, Baseline MSE: 141604.41, Baseline RMSE: 376.30, Baseline MAE: 294.97
Theoretical Prediction for 2024: [21491.81038547]


Row 32 - Port Value: New London (CT)
Row 32's Model - Train R²: 0.18, Validation R²: 0.08
Train MSE: 6159322.21, Validation MSE: 7124465.66
Train RMSE: 2481.80, Validation RMSE: 2669.17
Train MAE: 2047.05, Validation MAE: 2252.57
Baseline R²: -0.03, Baseline MSE: 7981954.44, Baseline RMSE: 2825.24, Baseline MAE: 2511.59
Theoretical Prediction for 2024: [2189.80269408]


Row 33 - Port Value: Olympia (WA)
Row 33's Model - Train R²: 0.45, Validation R²: 0.34
Train MSE: 9794882.70, Validation MSE: 10596596.69
Train RMSE: 3129.68, Validation RMSE: 3255.24
Train MAE: 2603.94, Validation MAE: 2728.10
Baseline R²: -0.04, Baseline MSE: 16589372.93, Baseline RMSE: 4073.01, Baseline MAE: 3603.95
Theoretical Prediction for 2024: [-168589.74404907]


Row 34 - Port Value: Orange (TX)
Row 34's Model - Train R²: 0.77, Validation R²: 0.82
Train MSE: 31501288.85, Validation MSE: 28111629.02
Train RMSE: 5612.60, Validation RMSE: 5302.04
Train MAE: 3715.15, Validation MAE: 3709.07
Baseline R²: -0.03, Baseline MSE: 157470707.60, Baseline RMSE: 12548.73, Baseline MAE: 10936.63
Theoretical Prediction for 2024: [59488.01167679]


Row 35 - Port Value: Oswego (NY)
Row 35's Model - Train R²: 0.62, Validation R²: 0.65
Train MSE: 330121973.96, Validation MSE: 306813769.25
Train RMSE: 18169.26, Validation RMSE: 17516.10
Train MAE: 14539.18, Validation MAE: 13448.28
Baseline R²: -0.01, Baseline MSE: 887321172.46, Baseline RMSE: 29787.94, Baseline MAE: 23300.99
Theoretical Prediction for 2024: [2250010.54302979]


Row 36 - Port Value: Palm Beach (FL)
Row 36's Model - Train R²: 0.80, Validation R²: 0.76
Train MSE: 452988.12, Validation MSE: 431940.70
Train RMSE: 673.04, Validation RMSE: 657.22
Train MAE: 489.41, Validation MAE: 517.77
Baseline R²: -0.00, Baseline MSE: 1815576.21, Baseline RMSE: 1347.43, Baseline MAE: 1181.71
Theoretical Prediction for 2024: [-82218.15686035]


Row 37 - Port Value: Pascagoula (MS)
Row 37's Model - Train R²: 0.66, Validation R²: 0.69
Train MSE: 97466340.72, Validation MSE: 67484981.76
Train RMSE: 9872.50, Validation RMSE: 8214.92
Train MAE: 7479.83, Validation MAE: 6759.03
Baseline R²: -0.00, Baseline MSE: 219015462.01, Baseline RMSE: 14799.17, Baseline MAE: 13685.44
Theoretical Prediction for 2024: [266962.31147766]


Row 38 - Port Value: Philadelphia (PA)
Row 38's Model - Train R²: 0.37, Validation R²: 0.17
Train MSE: 549685.95, Validation MSE: 676420.00
Train RMSE: 741.41, Validation RMSE: 822.45
Train MAE: 540.67, Validation MAE: 566.91
Baseline R²: -0.03, Baseline MSE: 840479.24, Baseline RMSE: 916.78, Baseline MAE: 736.86
Theoretical Prediction for 2024: [-9994.26999784]


Row 39 - Port Value: Plaquemines (LA)
Row 39's Model - Train R²: 0.69, Validation R²: 0.64
Train MSE: 84740167.67, Validation MSE: 95122211.05
Train RMSE: 9205.44, Validation RMSE: 9753.06
Train MAE: 6794.27, Validation MAE: 7527.46
Baseline R²: -0.06, Baseline MSE: 278581749.11, Baseline RMSE: 16690.77, Baseline MAE: 13943.80
Theoretical Prediction for 2024: [-838993.44750977]


Row 40 - Port Value: Ponce (PR)
Row 40's Model - Train R²: 0.29, Validation R²: 0.30
Train MSE: 12608227.69, Validation MSE: 10437096.59
Train RMSE: 3550.81, Validation RMSE: 3230.65
Train MAE: 2635.04, Validation MAE: 2384.69
Baseline R²: -0.07, Baseline MSE: 15874744.46, Baseline RMSE: 3984.31, Baseline MAE: 3787.70
Theoretical Prediction for 2024: [64787.60416794]


In [ ]: